Plot the figure panels relating to the CRISPR deletion of Sox2, from the sorting data as well as the sorted pools & clones.
To re-run this code, change the paths in ‘File paths’ to the correct location of datasets. The file paths of the fcs files are hardcoded in the annotation of the clones (). To re-run the analysis of those clones, adapt the chunk ‘load_fcs_data_clones’ to point directly to the path you stored these fcs files (paste0(/your/own/path/,fcs_files_E2350$filename)).
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks BiocGenerics::combine()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# library(dtplyr)
library(dplyr)
# library(tidyr)
library(ggbeeswarm)
library(ggplot2)
library(Biostrings)## Loading required package: XVector
##
## Attaching package: 'XVector'
##
## The following object is masked from 'package:purrr':
##
## compact
##
##
## Attaching package: 'Biostrings'
##
## The following object is masked from 'package:base':
##
## strsplit
library(stringr)
library(readr)
library(knitr)
library(RColorBrewer)
library(RcppRoll)
# library(plotly)
library(readxl)
library(ggpubr)
library(ggpmisc)## Loading required package: ggpp
## Registered S3 methods overwritten by 'ggpp':
## method from
## heightDetails.titleGrob ggplot2
## widthDetails.titleGrob ggplot2
##
## Attaching package: 'ggpp'
##
## The following objects are masked from 'package:ggpubr':
##
## as_npc, as_npcx, as_npcy
##
## The following object is masked from 'package:ggplot2':
##
## annotate
##
## Registered S3 method overwritten by 'ggpmisc':
## method from
## as.character.polynomial polynom
## The import package should not be attached.
## Use "colon syntax" instead, e.g. import::from, or import:::from.
##
## Attaching package: 'import'
##
## The following object is masked from 'package:IRanges':
##
## from
##
## The following object is masked from 'package:S4Vectors':
##
## from
##
## Attaching package: 'ggh4x'
##
## The following object is masked from 'package:ggpp':
##
## stat_centroid
import::from(flowCore, .except=c("filter")) #I don't load the whole flowCore and ggcyto libraries, because they overwrite dplyr's filter function :0
import::from(ggcyto, 'fortify_fs')
library(flowDensity)
#to install this you need to manually install RFOC first:
#install_url('https://cran.r-project.org/src/contrib/Archive/RFOC/RFOC_3.4-6.tar.gz')
library(caTools)##
## Attaching package: 'caTools'
##
## The following object is masked from 'package:IRanges':
##
## runmean
##
## The following object is masked from 'package:S4Vectors':
##
## runmean
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggpubr':
##
## get_legend
##
## The following object is masked from 'package:lubridate':
##
## stamp
#datatag
datetag = paste0("CM",format(Sys.time(), '%Y%m%d'))
# Prepare output
output_dir <- paste0("/DATA/projects/Sox2/Figure_Sox2_CRISPR/analysis_",datetag)
dir.create(output_dir, showWarnings = FALSE)library(knitr)
opts_chunk$set(cache = T,
message = F,
warning = F,
dev=c('png', 'pdf'),
dpi = 600,
fig.path = file.path(output_dir, "figures/"))
pdf.options(useDingbats = FALSE)path_CTCF_sites = "/DATA/usr/v.franceschini/Workspaces/2024_01_MATHIAS_CTCF_PEAKS/VF240812_calling_CTCF_motifs_again/02_OUTPUTS/CTCF_sites/6764_2_ME_E2_CGATGT_S2_R1_001_peaks_motifs.bed"
path_fcs_E2211 = '/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2211_CRISPR_Sox2_del_1/fcs_for_R'
# E2263 WT samples (for negative cutoffs)
path_fcs_WT_E2263_1 = "/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2263_CRISPR_Sox2_del_2/fcs_for_R/export_me20230627_E2263_WT_neg_ctrl_001_Single Cells.fcs"
path_fcs_WT_E2263_2 = "/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2263_CRISPR_Sox2_del_2/fcs_for_R/export_me20230630_E2263_WT_neg_ctrl_001_Single Cells.fcs"
path_annotation_clones_E2350 = "/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2350_generating_CRISPR_clones/CM20240221_E2350_fcs_files_tib.rds"
#hopping data
path_tib_23_ints = "/DATA/projects/Sox2/GEO_collection/Tagmentation/mapped_integrations_-116kb_Sox2P_pools.txt"
path_tib_C9_mChpos_ints = "/DATA/projects/Sox2/GEO_collection/Tagmentation/mapped_integrations_-161kb_Sox2P_pools.txt"
path_tib_C9_mChneg_ints = "/DATA/projects/Sox2/GEO_collection/Tagmentation/mapped_integrations_-161kb_Sox2mCherrydel_Sox2P_pools.txt"
#sorting data hopping
diva_xml_file = "/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2534_hopping_C9_LP/FACS/Mathias (BvS)20240501_E2534_rep1.xml"
path_FACS_sorting_E2555 = "/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2534_hopping_C9_LP/FACS"
#functions
source("/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/Combined_for_paper/general_functions/CM20230814_general_plotting_functions.R")gene etc
Sox2_gene <- GRanges(seqnames = "chr3",
ranges = IRanges(start = 34649995,
end = 34652461),
strand = "+")
enhancer <- GRanges(seqnames = "chr3",
ranges = IRanges(start = 34753415,
end = 34766401),
strand = "*")
#CTCF sites based on our own mapping
CTCF_mm10 <- import.bed(path_CTCF_sites)
CTCF_mm10.chr3 <- CTCF_mm10[seqnames(CTCF_mm10)== 'chr3']
# add the missing site after the SCR
CTCF_mm10.chr3_extra = sort(c(CTCF_mm10.chr3,
GRanges(seqnames = "chr3",
IRanges(start = 34772210, end = 34772210),
strand = "+")),
ignore.strand = T)colors
colors_gates = c("black", "grey60", "#4d6b53", "#e7298a")
colscale_gates = scale_colour_manual(name = "gate_for_color", values = colors_gates,
aesthetics = c("color", "fill"))
colors_clones = c(`negative control` = "grey60", untreated = "black", delA = '#f680af', delB = '#d43c8a')
colscale_clones = scale_colour_manual(name = "treatment_for_color", values = colors_clones,
aesthetics = c("color", "fill"))
## blue colors beeswarm (mChpos_34)
myCols_blue = c('#525252', "#080E5B", "#212B71", "#3A4987", "#465893", "#5A719A", "#6C8AA0")
names(myCols_blue) = c("ctrl", paste0("P", 1:6))
colScale7_blue <- scale_colour_manual(name = "population", values = myCols_blue)
fillScale7_blue <- scale_fill_manual(name = "population", values = myCols_blue)
## pink colors beeswarm (mChneg_34)
myCols_pink = c('#525252', "#591437", "#9c2261", "#c92c7c", "#d43c8a", "#e58cb9", "#cb8cac")
names(myCols_pink) = c("ctrl", paste0("P", 1:6))
cols_CL_pop = c(myCols_blue, myCols_pink)
names(cols_CL_pop) = c(paste0("mChpos_34_", names(myCols_blue)),
paste0("C9_mCh-_34_" , names(myCols_pink) ))
colScale_pop_CL = scale_colour_manual(name = "CL_population", values = cols_CL_pop)
## SCR
brown = "#ffb000"Exact coordinates landing pads
fcs_files_E2211 = dir(path_fcs_E2211,
full.names = T)
fcs_files_E2211_tib = tibble(file = fcs_files_E2211) %>%
mutate(sample_name_tmp = split_string(str_remove(file, '.*/'), "_", 4, 7)) %>%
mutate(cell_line = case_when(grepl("23_34A", sample_name_tmp) ~ "23_34A",
grepl("23_C9", sample_name_tmp) ~ "C9_34",
grepl("8_34_8", sample_name_tmp) ~ "8_34"),
treatment = case_when(grepl("ctrl", sample_name_tmp) ~ "untreated",
grepl("Del-A", sample_name_tmp) ~ "delA",
grepl("Del-B", sample_name_tmp) ~ "delB"),
timepoint = "sorting",
experiment = "E2211",
flowcytometer = "sorter",
sample_name = paste0(experiment, "_", cell_line, "_", treatment),
date = "me20230317"
) %>%
mutate(fcs_filter = case_when(grepl("Single Cells.fcs", file) ~ "SingleCells",
grepl("Single Cells.fcs", file) ~ "SingleCells2")) %>%
filter(fcs_filter == "SingleCells") #use equivalent filtering to the other samples (only filter for doublets once)fcs_annotation_tib = bind_rows(fcs_files_E2211_tib) %>%
mutate(cell_line = factor(cell_line, c("C9_34", "23_34A", "8_34", "WT"))) %>%
mutate(landing_pad = case_when(cell_line == "WT" ~ "none",
.default = split_string(cell_line, "_", 1))) %>%
mutate(landing_pad = factor(landing_pad, levels = c("none", "C9", "23", "8"),
labels = c("no insert", "-161 kb", "-116 kb", "-39 kb")),
treatment = factor(treatment, levels = c("untreated", "delA", "delB"),
labels = c("untreated", "ΔSox2", "ΔCBS_Sox2"))) %>%
select(-c(fcs_filter))fcs_annotation_df = data.frame(fcs_annotation_tib)
rownames(fcs_annotation_df) = fcs_annotation_df$sample_name
flowset = flowCore::read.flowSet(files = fcs_annotation_df$file, alter.names = T, truncate_max_range = F,
ignore.text.offset = T,
column.pattern = 'BL.B..530_30.A|YG.D..610_20.A|V.F..450_50.A|FSC.A|SSC.A'
#samples from sorter have different fluorophores, only take the shared ones
)
flowCore::sampleNames(flowset) = fcs_annotation_df$sample_name
pData(flowset) = fcs_annotation_df
flowset_fluo = flowset[,c('BL.B..530_30.A', 'YG.D..610_20.A', 'V.F..450_50.A')]
colnames(flowset_fluo) = c("GFP", "mCherry", "mTurq")
rm(flowset)total_cell_count_tmp = flowCore::keyword(flowset_fluo, "$TOT")[,'$TOT']
total_cell_count = as.numeric(total_cell_count_tmp)
names(total_cell_count) = names(total_cell_count_tmp)
measurement_date = flowCore::keyword(flowset_fluo, "$DATE")[,'$DATE']
median_mTurq = sapply(1:length(flowset_fluo), function(i){
median(exprs(flowset_fluo[[i]])[,'mTurq'])
})
names(median_mTurq) = sampleNames(flowset_fluo)
median_mCherry = sapply(1:length(flowset_fluo), function(i){
median(exprs(flowset_fluo[[i]])[,'mCherry'])
})
names(median_mCherry) = sampleNames(flowset_fluo)
median_GFP = sapply(1:length(flowset_fluo), function(i){
median(exprs(flowset_fluo[[i]])[,'GFP'])
})
names(median_GFP) = sampleNames(flowset_fluo)
pData(flowset_fluo)$median_mTurq = median_mTurq
pData(flowset_fluo)$median_mCherry = median_mCherry
pData(flowset_fluo)$median_GFP = median_GFP
pData(flowset_fluo)$total_cell_count = total_cell_count
pData(flowset_fluo)$measurement_date = measurement_date #date from the fcs file, must be correct
flowset_fluo_data_tib = tibble(pData(flowset_fluo))#load WT sames from E2263 (also sorter) to set GFP-/mCh- cutoffs
flowset_WT_ctrls = flowCore::read.flowSet(files = c(path_fcs_WT_E2263_1, path_fcs_WT_E2263_2), alter.names = T, truncate_max_range = F)[,c('BL.B..530_30.A', 'YG.D..610_20.A', 'V.F..450_50.A', 'FSC.A', 'SSC.A')]
colnames(flowset_WT_ctrls) = c("GFP", "mCherry", "mTurq",'FSC', 'SSC')
mCh_neg_99_ls = lapply(1:2, function(i){
quantile(exprs(flowset_WT_ctrls[[i]][,'mCherry']), probs = 0.99)
})
mCh_neg_99 = mean(unlist(mCh_neg_99_ls)) #cut-off for mCherry negative
GFP_neg_99_ls = lapply(1:2, function(i){
quantile(exprs(flowset_WT_ctrls[[i]][,'GFP']), probs = 0.99)
})
GFP_neg_99 = mean(unlist(GFP_neg_99_ls)) #cut-off for GFP negative
mT_neg_99_ls = lapply(1:2, function(i){
quantile(exprs(flowset_WT_ctrls[[i]][,'mTurq']), probs = 0.99)
})
mT_neg_99 = mean(unlist(mT_neg_99_ls)) #cut-off for mTurq negativeUse 1.5x the border of the WT sample as a cutoff for the GFP+ and mCh+ gates (so we have some space)
dotplot_23_delB_GFP_mCh = ggplot(flowset_fluo[3], aes(x = GFP, y= mCherry, fill = after_stat(ncount))) +#if you want each separate panel to end in the highest color
facet_nested(landing_pad + treatment ~ date) +
theme_classic() +
geom_hex(bins = 128) +
scale_fill_distiller(palette = 'Spectral') +
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42) +
ggcyto::scale_y_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42) +
geom_hline(yintercept = c(mCh_neg_99, mCh_neg_99*1.5)) +
geom_vline(xintercept = c(GFP_neg_99, GFP_neg_99*1.5)) +
theme(aspect.ratio = 1,
legend.position = 'none' )
# ggsave('/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/Combined_for_paper/Manuscript_figures/Sox2_CRISPR/CM20240301_FACS_GFP_mCherry_example.pdf' , dotplot_23_delB_GFP_mCh, width = 7, units = 'cm' )Use 1.5x the border of the WT sample as a cutoff for the GFP+ and mCh+ gates (so we have some space)
double_posGate <- rectangleGate(filterId="GFP+ mCh+",
"GFP"=c(1.5*GFP_neg_99, Inf), "mCherry"=c(1.5*mCh_neg_99, Inf))
mCh_negGate <- rectangleGate(filterId="GFP+ mCh-",
"GFP"=c(1.5*GFP_neg_99, Inf), "mCherry"=c(-Inf, mCh_neg_99))
GFP_negGate <- rectangleGate(filterId="GFP- mCh+",
"GFP"=c(-Inf, GFP_neg_99), "mCherry"=c(1.5*mCh_neg_99, Inf))
doublepos_fcs = flowCore::Subset(flowset_fluo, double_posGate)
pData(doublepos_fcs)$gate = "doublepos"
sampleNames(doublepos_fcs) = paste0(sampleNames(doublepos_fcs), "_", pData(doublepos_fcs)$gate)
mChneg_fcs = flowCore::Subset(flowset_fluo, mCh_negGate)
pData(mChneg_fcs)$gate = "mChneg"
sampleNames(mChneg_fcs) = paste0(sampleNames(mChneg_fcs), "_", pData(mChneg_fcs)$gate)
GFPneg_fcs = flowCore::Subset(flowset_fluo, GFP_negGate)
pData(GFPneg_fcs)$gate = "GFPneg"
sampleNames(GFPneg_fcs) = paste0(sampleNames(GFPneg_fcs), "_", pData(GFPneg_fcs)$gate)
gated_fcs = rbind2(doublepos_fcs, mChneg_fcs)
gated_fcs = rbind2(gated_fcs, GFPneg_fcs)
cell_count_gate = sapply(1:length(gated_fcs), function(i){
nrow(gated_fcs[[i]])
})
pData(gated_fcs)$cell_count_gate = cell_count_gategated_samples_oi = pData(gated_fcs)$treatment %in% c("ΔSox2", "ΔCBS_Sox2")|
pData(gated_fcs)$gate == "doublepos" #for untreated the GFPneg and mChneg gates are meaningless
gated_samples_oi_rep1 = (pData(gated_fcs)$treatment %in% c("ΔSox2", "ΔCBS_Sox2")| pData(gated_fcs)$gate == "doublepos") &
pData(gated_fcs)$experiment == "E2211"
# we don't need the duplicates sample if we use ridgeline plots
#still do need to add a factor for the colors
gated_fcs_2 = gated_fcs
pData(gated_fcs_2)$gate_for_color = case_when(pData(gated_fcs_2)$treatment == "untreated" ~
paste0(pData(gated_fcs_2)$gate, "_untreated"),
.default = pData(gated_fcs_2)$gate)
pData(gated_fcs_2)$gate_for_color = factor(pData(gated_fcs_2)$gate_for_color,
levels = c("doublepos_untreated","doublepos", "GFPneg", "mChneg"))
#plot rep1
cell_count_tib_rep1 =
tibble(pData(gated_fcs_2[gated_samples_oi_rep1])) %>%
select(experiment, gate, cell_line,gate_for_color, treatment, cell_count_gate, date)
#regular density plot
dens_plot_active_LP_gated = ggplot(gated_fcs_2[gated_samples_oi_rep1] ,
aes(x = mTurq, fill = gate_for_color, col = gate_for_color)) +
# facet_nested(experiment + treatment ~ cell_line, scale = "free_y") +
facet_nested(treatment ~ cell_line, scale = "free_y", space = 'free_y') +
geom_density_ridges(aes(y = fct_reorder(gate_for_color, as.integer(gate_for_color), .desc= T)),
size =0.3, # rel_min_height = 0.005,
scale = 1.5,
alpha = 0.50) + #,
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5), limits = c(-10^3, 5*10^5),
pos = 4.42 ) +
scale_y_discrete(expand = expansion( add = c(0, 1.7))) +
geom_text_npc(data = cell_count_tib_rep1, aes(label = cell_count_gate,
npcy = 1.2-(as.integer(gate_for_color)/4), col = gate_for_color),
npcx = 0.95, hjust = "inward") +
theme_bw(base_size = 14) +
theme(axis.title.y = element_blank(),
legend.position = 'none') +
colscale_gates
dens_plot_active_LP_gated# ggsave( "/DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/Combined_for_paper/Manuscript_figures/Sox2_CRISPR/CM20240304_FACS_density_gates_active_LPs_E2211.pdf", dens_plot_active_LP_gated, width = 20, height = 20, units = "cm")# plot only untreated plus WT ctrl for sup
gated_samples_oi_rep1_untreated = (pData(gated_fcs)$treatment == "untreated" &
pData(gated_fcs)$experiment == "E2211" &
pData(gated_fcs)$gate == "doublepos")
pData(gated_fcs)[gated_samples_oi_rep1_untreated,]## file
## E2211_23_34A_untreated_doublepos /DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2211_CRISPR_Sox2_del_1/fcs_for_R/export_Mathias (BvS) ME20230317_E2211_23_34A_ctrl_001_Single Cells.fcs
## E2211_C9_34_untreated_doublepos /DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2211_CRISPR_Sox2_del_1/fcs_for_R/export_Mathias (BvS) ME20230317_E2211_23_C9_ctrl_003_Single Cells.fcs
## E2211_8_34_untreated_doublepos /DATA/usr/c.moene/projects/Hopping_Sox2_mTurq/E2211_CRISPR_Sox2_del_1/fcs_for_R/export_Mathias (BvS) ME20230317_E2211_8_34_8_ctrl_002_Single Cells.fcs
## sample_name_tmp cell_line treatment timepoint
## E2211_23_34A_untreated_doublepos 23_34A_ctrl_001 23_34A untreated sorting
## E2211_C9_34_untreated_doublepos 23_C9_ctrl_003 C9_34 untreated sorting
## E2211_8_34_untreated_doublepos 8_34_8_ctrl 8_34 untreated sorting
## experiment flowcytometer
## E2211_23_34A_untreated_doublepos E2211 sorter
## E2211_C9_34_untreated_doublepos E2211 sorter
## E2211_8_34_untreated_doublepos E2211 sorter
## sample_name date landing_pad
## E2211_23_34A_untreated_doublepos E2211_23_34A_untreated me20230317 -116 kb
## E2211_C9_34_untreated_doublepos E2211_C9_34_untreated me20230317 -161 kb
## E2211_8_34_untreated_doublepos E2211_8_34_untreated me20230317 -39 kb
## name median_mTurq
## E2211_23_34A_untreated_doublepos E2211_23_34A_untreated 1027.790
## E2211_C9_34_untreated_doublepos E2211_C9_34_untreated 224.360
## E2211_8_34_untreated_doublepos E2211_8_34_untreated 2167.365
## median_mCherry median_GFP total_cell_count
## E2211_23_34A_untreated_doublepos 8545.811 16155.36 50267
## E2211_C9_34_untreated_doublepos 8255.521 15446.08 50081
## E2211_8_34_untreated_doublepos 9754.290 16556.80 49904
## measurement_date gate cell_count_gate
## E2211_23_34A_untreated_doublepos 17-MAR-2023 doublepos 50198
## E2211_C9_34_untreated_doublepos 17-MAR-2023 doublepos 49993
## E2211_8_34_untreated_doublepos 17-MAR-2023 doublepos 49784
dens_plot_LP_untreated_gated =
ggplot(gated_fcs[gated_samples_oi_rep1_untreated],
aes(x = mTurq)) +
#facet_nested(experiment + treatment ~ cell_line, scale = "free_y") +
#facet_nested(cell_line ~ treatment, scale = "free_y", space = 'free_y') +
geom_density_ridges(aes(y = cell_line, fill = "black"),
size =0.3, # rel_min_height = 0.005,
scale = 1.5,
alpha = 0.50,
quantile_lines = TRUE, quantiles = 2) +
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5), limits = c(-10^3, 1*10^5),
pos = 4.42 ) +
scale_y_discrete(expand = expansion(add = c(0, 0.8))) +
# geom_text_npc(data = cell_count_tib_rep1, aes(label = cell_count_gate,
# npcy = 1.2-(as.integer(gate_for_color)/4), col = gate_for_color),
# npcx = 0.95, hjust = "inward") +
theme_bw(base_size = 14) +
theme(axis.title.y = element_blank(),
legend.position = 'none') +
colscale_gates
dens_plot_LP_untreated_gatedfcs_files_E2350 = readRDS(path_annotation_clones_E2350) %>%
#remove the WT samples that were measured on a different day (labeled here as E2250, but were actually measured in E2254)
filter(!name %in% c(paste0('E2250_WT_untreated_none_E', 10:12))) %>%
mutate(flow_experiment = experiment) %>%
filter(!cell_line == "C10_34" & !cell_line == "C1_34") %>%
mutate(GFP_state = case_when(sorted_phenotype == "GFP-" ~ "GFP-",
cell_line == "WT" ~ "GFP-",
.default = "GFP+"),
mCh_state = case_when(sorted_phenotype %in% c("mCh-_mT-", "mCh-_mT+", "mCh-_NA") ~ "mCh-",
cell_line == "WT" ~ "mCh-",
.default = "mCh+")) %>%
mutate(expected_insert = case_when(cell_line %in% c("23_34A", "8_34.8", "C9_34") ~ "34",
.default = "none")) %>%
mutate(landing_pad = case_when(expected_insert == "34" ~ str_replace(cell_line, "_.*$", ""),
.default = "none")) %>%
mutate(control_samples = case_when(cell_line %in% c("WT", "F121-9") ~ cell_line,
cell_line == "23_34A" & treatment == "untreated" ~ "23_34A")) %>%
mutate(type = case_when(type == "pool" ~ "sorted_pool",
.default = type)) %>%
mutate(sorted_phenotype = case_when(sorted_phenotype == "mCh-_NA" ~ "mCh-_mT+", #correct the pools which were wrongly labeled as mCh-_NA
.default = sorted_phenotype)) %>%
#only include remeasurements, but also from the sorter (for E2263)
filter(timepoint == "remeasurement") %>%
mutate(exp_name = paste0(experiment, "_", sample_name)) %>%
mutate(cell_line_clone_name = case_when(type == "control" ~ cell_line,
type == "clone" ~ paste0(cell_line, "_", treatment, "_", clone_name))) %>%
mutate(treatment_for_color = case_when(cell_line %in% c("WT", "F121-9") ~ "negative control",
.default = treatment))%>%
mutate(cell_line_treatment = paste0(cell_line, "_", treatment)) %>%
#only include the treatments & sortings relevant here:
filter(treatment %in% c('untreated', 'delA', 'delB') &
sorted_phenotype %in% c("mCh-_mT+", "none")) %>%
mutate(treatment = factor(treatment, levels = c('untreated', 'delA', 'delB'))) %>%
#for deletions only include correctly genotyped clones
filter((is.na(PCR_band_deletion) | PCR_band_deletion) &
(is.na(PCR_band_intact_allele) | PCR_band_intact_allele)) %>%
#add short filename
mutate(filename = str_remove(file, '^.*/'))fcs_annotation_E2350_df = data.frame(fcs_files_E2350)
rownames(fcs_annotation_E2350_df) = fcs_files_E2350$exp_name
flowset = flowCore::read.flowSet(files = fcs_annotation_E2350_df$file, alter.names = T, truncate_max_range = F,
ignore.text.offset = T,
column.pattern = 'BL.B..530_30.A|YG.D..610_20.A|V.F..450_50.A|FSC.A|SSC.A'
#samples from sorter have different fluorophores, only take the shared ones
)
## to run this when files are in a different folder (all in one folder, called MyFolder), run:
# flowset = flowCore::read.flowSet(files = paste0(MyFolder, "/", fcs_annotation_E2350_df$filename), alter.names = T, truncate_max_range = F,
# ignore.text.offset = T,
# column.pattern = 'BL.B..530_30.A|YG.D..610_20.A|V.F..450_50.A|FSC.A|SSC.A'
# #samples from sorter have different fluorophores, only take the shared ones
# )
flowCore::sampleNames(flowset) = fcs_annotation_E2350_df$exp_name
pData(flowset) = fcs_annotation_E2350_df
flowset_fluo_clones = flowset[,c('BL.B..530_30.A', 'YG.D..610_20.A', 'V.F..450_50.A')]
colnames(flowset_fluo_clones) = c("GFP", "mCherry", "mTurq")
rm(flowset)total_cell_count_tmp = flowCore::keyword(flowset_fluo_clones, "$TOT")[,'$TOT']
total_cell_count = as.numeric(total_cell_count_tmp)
names(total_cell_count) = names(total_cell_count_tmp)
measurement_date = flowCore::keyword(flowset_fluo_clones, "$DATE")[,'$DATE']
median_mTurq = sapply(1:length(flowset_fluo_clones), function(i){
median(exprs(flowset_fluo_clones[[i]])[,'mTurq'])
})
names(median_mTurq) = sampleNames(flowset_fluo_clones)
median_mCherry = sapply(1:length(flowset_fluo_clones), function(i){
median(exprs(flowset_fluo_clones[[i]])[,'mCherry'])
})
names(median_mCherry) = sampleNames(flowset_fluo_clones)
median_GFP = sapply(1:length(flowset_fluo_clones), function(i){
median(exprs(flowset_fluo_clones[[i]])[,'GFP'])
})
names(median_GFP) = sampleNames(flowset_fluo_clones)
pData(flowset_fluo_clones)$median_mTurq = median_mTurq
pData(flowset_fluo_clones)$median_mCherry = median_mCherry
pData(flowset_fluo_clones)$median_GFP = median_GFP
pData(flowset_fluo_clones)$total_cell_count = total_cell_count
pData(flowset_fluo_clones)$measurement_date = measurement_date #date from the fcs file, must be correct
flowset_fluo_clones_data_tib = tibble(pData(flowset_fluo_clones))#set up the transformations I use in the plots
fwd_transf = flowWorkspace::flowjo_biexp(widthBasis = -100, pos = 4.42, inverse = F)
inv_transf = flowWorkspace::flowjo_biexp(widthBasis = -100, pos = 4.42, inverse = T)
#function to draw a line ad the two top peaks
#important: the values for the geom_density_ridges quantile lines need to be sorted non-decreasingly!
fun_high_peaks2 = function(x, ...){
peaks_obj = getPeaks(fwd_transf(x))
N_peaks_to_pick = min(2, length(peaks_obj$Peaks))
# N_peaks_to_pick = 1
top_2_peaks = order(peaks_obj$P.h, decreasing = T)[1:N_peaks_to_pick]
linear_peak = inv_transf(getPeaks(fwd_transf(x))$Peaks[top_2_peaks])
sort(linear_peak)
}pools (and controls) of E2250
#E2250 pools
samples_E2250_pools_paper_idx = pData(flowset_fluo_good)$experiment %in% c("E2250") &
pData(flowset_fluo_good)$type %in% c("control", "sorted_pool") &
pData(flowset_fluo_good)$sorted_phenotype %in% c("none", "mCh-_mT+") &
!pData(flowset_fluo_good)$cell_line %in% c("F121-9")
# #order the samples in a sensible order
levels_pools = pData(flowset_fluo_good[samples_E2250_pools_paper_idx]) %>%
mutate(treatment = factor(treatment, levels = c("delA", "delB", "untreated"))) %>%
mutate(landing_pad = factor(landing_pad, levels = c("8", "23", "C9", "none"))) %>%
arrange(landing_pad, treatment) %>% pull(exp_name)
ggplot(flowset_fluo_good[samples_E2250_pools_paper_idx], aes(x = mTurq, fill = treatment_for_color)) +
facet_nested( factor(landing_pad, levels = c("none", "C9", "23", "8")) ~ ., scales = 'free_y', space = 'free_y') +
geom_density_ridges(aes(y = factor(exp_name, levels = rev(levels_pools))),
size =0.3, # rel_min_height = 0.005,
scale = 1.5,
alpha = 0.75,
quantile_lines = TRUE, quantile_fun = fun_high_peaks2) + #,
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42) +
scale_y_discrete(expand = expansion( add = c(0, 2))) +
theme_bw() +
theme(legend.position = 'bottom',
axis.title.y = element_blank()) +
colscale_clonescolors: delA and delB (mCh-) two types of pink black for unedited clone grey for WT/F121-9
#select the samples to plot
samples_clones_23_delA = pData(flowset_fluo_good) %>%
filter(type %in% c("clone", "control") &
landing_pad %in% c("23", "none") &
sorted_phenotype %in% c("mCh-_mT+", "none") &
treatment %in% c("untreated", "delA") &
date == "ME20230426") %>% pull(exp_name)
samples_clones_23_delA_idx = pData(flowset_fluo_good)$exp_name %in% samples_clones_23_delA
#order them by mTurq value / the controls in a sensible order
level_order_clones = pData(flowset_fluo_good[samples_clones_23_delA_idx]) %>%
filter(type == "clone") %>%
arrange(median_mTurq) %>% pull(cell_line_clone_name)
level_order <- c("WT", "F121-9", "23_34A", level_order_clones )
#plot
ggplot(flowset_fluo_good[samples_clones_23_delA_idx], aes(x = mTurq, fill = treatment_for_color)) +
# facet_nested( type ~ . , scales = 'free_y', space = 'free_y', render_empty = F) +
geom_density_ridges(aes(y = factor(cell_line_clone_name, levels = level_order)),
size =0.3, # rel_min_height = 0.005,
scale = 2,
alpha = 0.75,
quantile_lines = TRUE, quantile_fun = fun_high_peaks2, vline_width = 0.5) + #+ #,
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42, limits = c(0, 5*10^4)) +
scale_y_discrete(expand = expansion( add = c(0, 1.5))) +
# geom_vline(data = filter(peaks_linear_tib, exp_name %in% pData(flowset_fluo_good[samples_clones_23_delA_idx])$exp_name),aes(xintercept = mTurq_peak)) +
theme_bw() +
theme(legend.position = 'bottom',
axis.title.y = element_blank(),
aspect.ratio = 1.5) +
colscale_clones#select the samples to plot
samples_clones_23_delB = pData(flowset_fluo_good) %>%
filter(type %in% c("clone", "control") &
landing_pad %in% c("23", "none") &
sorted_phenotype %in% c("mCh-_mT+", "none") &
treatment %in% c("untreated", "delB") &
date == "me20230418") %>% pull(exp_name)
samples_clones_23_delB_idx = pData(flowset_fluo_good)$exp_name %in% samples_clones_23_delB
#order them by mTurq value / the controls in a sensible order
level_order_clones = pData(flowset_fluo_good[samples_clones_23_delB_idx]) %>%
filter(type == "clone") %>%
arrange(median_mTurq) %>% pull(cell_line_clone_name)
level_order <- c("WT", "F121-9", "23_34A", level_order_clones )
#plot
ggplot(flowset_fluo_good[samples_clones_23_delB_idx], aes(x = mTurq, fill = treatment_for_color)) +
# facet_nested( type ~ . , scales = 'free_y', space = 'free_y', render_empty = F) +
geom_density_ridges(aes(y = factor(cell_line_clone_name, levels = level_order)),
size =0.3, # rel_min_height = 0.005,
scale = 2,
alpha = 0.75,
quantile_lines = TRUE, quantile_fun = fun_high_peaks2, vline_width = 0.5) + #+ #,
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42, limits = c(0, 5*10^4)) +
scale_y_discrete(expand = expansion( add = c(0, 1.5))) +
# geom_vline(data = filter(peaks_linear_tib, exp_name %in% pData(flowset_fluo_good[samples_clones_23_delA_idx])$exp_name),aes(xintercept = mTurq_peak)) +
theme_bw() +
theme(legend.position = 'bottom',
axis.title.y = element_blank(),
aspect.ratio = 1.5) +
colscale_clones#select the samples to plot
samples_clones_8_delB = pData(flowset_fluo_good) %>%
filter(type %in% c("clone", "control") &
landing_pad %in% c("8", "none") &
sorted_phenotype %in% c("mCh-_mT+", "none") &
treatment %in% c("untreated", "delB") &
date == "20231128") %>% pull(exp_name)
samples_clones_8_delB_idx = pData(flowset_fluo_good)$exp_name %in% samples_clones_8_delB
#order them by mTurq value / the controls in a sensible order
level_order_clones = pData(flowset_fluo_good[samples_clones_8_delB_idx]) %>%
filter(type == "clone") %>%
arrange(median_mTurq) %>% pull(cell_line_clone_name)
level_order <- c("WT", "F121-9", "8_34.8", level_order_clones )
#plot
ggplot(flowset_fluo_good[samples_clones_8_delB_idx], aes(x = mTurq, fill = treatment_for_color)) +
# facet_nested( type ~ . , scales = 'free_y', space = 'free_y', render_empty = F) +
geom_density_ridges(aes(y = factor(cell_line_clone_name, levels = level_order)),
size =0.3, # rel_min_height = 0.005,
scale = 2,
alpha = 0.75,
quantile_lines = TRUE, quantile_fun = fun_high_peaks2, vline_width = 0.5) + #+ #,
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42, limits = c(0, 5*10^4)) +
scale_y_discrete(expand = expansion( add = c(0, 1.5))) +
# geom_vline(data = filter(peaks_linear_tib, exp_name %in% pData(flowset_fluo_good[samples_clones_23_delA_idx])$exp_name),aes(xintercept = mTurq_peak)) +
theme_bw() +
theme(legend.position = 'bottom',
axis.title.y = element_blank(),
aspect.ratio = 1.5) +
colscale_clones#select the samples to plot
samples_clones_C9_delB = pData(flowset_fluo_good) %>%
filter(type %in% c("clone", "control") &
landing_pad %in% c("C9", "none") &
sorted_phenotype %in% c("mCh-_mT+","mCh-_mT-", "none") & #for C9 we sorted mT high and lower clones from the pool, both are mTurq positive and should be included in the plot
treatment %in% c("untreated", "delB") &
date == "20231128") %>% pull(exp_name)
samples_clones_C9_delB_idx = pData(flowset_fluo_good)$exp_name %in% samples_clones_C9_delB
#order them by mTurq value / the controls in a sensible order
level_order_clones = pData(flowset_fluo_good[samples_clones_C9_delB_idx]) %>%
filter(type == "clone") %>%
arrange(median_mTurq) %>% pull(cell_line_clone_name)
level_order <- c("WT", "F121-9", "C9_34", level_order_clones )
#plot
ggplot(flowset_fluo_good[samples_clones_C9_delB_idx], aes(x = mTurq, fill = treatment_for_color)) +
# facet_nested( type ~ . , scales = 'free_y', space = 'free_y', render_empty = F) +
geom_density_ridges(aes(y = factor(cell_line_clone_name, levels = level_order)),
size =0.3, # rel_min_height = 0.005,
scale = 2,
alpha = 0.75,
quantile_lines = TRUE, quantile_fun = fun_high_peaks2, vline_width = 0.5) + #+ #,
ggcyto::scale_x_flowjo_biexp(widthBasis = -100, breaks = c(-10^3, 0, 10^3, 10^4, 10^5),
pos = 4.42, limits = c(0, 5*10^4)) +
scale_y_discrete(expand = expansion( add = c(0, 1.5))) +
# geom_vline(data = filter(peaks_linear_tib, exp_name %in% pData(flowset_fluo_good[samples_clones_23_delA_idx])$exp_name),aes(xintercept = mTurq_peak)) +
theme_bw() +
theme(legend.position = 'bottom',
axis.title.y = element_blank(),
aspect.ratio = 1.5) +
colscale_clones
## Boxplot highest peaks
calculate the normalized / relative medians
# get the autofluorescence for all dates
autofluo_tib = good_clones_tib %>%
group_by(flow_experiment, date) %>%
filter(control_samples == "WT") %>%
summarize(autofluo_mTurq = mean(median_mTurq),
autofluo_mCherry = mean(median_mCherry),
autofluo_GFP = mean(median_GFP)
)
#get normalized corrected fluorescence for all samples
good_clones_tib_norm_tmp = good_clones_tib %>%
left_join(autofluo_tib) %>%
#autofluorescence corrected (_cor)
mutate(median_mTurq_cor = median_mTurq - autofluo_mTurq,
median_mCherry_cor = median_mCherry - autofluo_mCherry,
median_GFP_cor = median_GFP - autofluo_GFP) %>%
#normalized to GFP (_norm)
mutate(mTurq_norm = median_mTurq_cor / median_GFP_cor,
mCherry_norm = median_mCherry_cor / median_GFP_cor)
# get standard values (23_34A) for all dates
standard_tib = good_clones_tib_norm_tmp %>%
group_by(flow_experiment, date) %>%
filter(control_samples == "23_34A") %>%
summarize(
#mean autofluo corrected
mTurq_cor_st = mean(median_mTurq_cor),
mCherry_cor_st = mean(median_mCherry_cor),
GFP_cor_st = mean(median_GFP_cor),
# mean of normalized to GFP
mTurq_norm_st = mean(mTurq_norm),
mCherry_norm_st = mean(mCherry_norm),
#mean raw median values
mTurq_st = mean(median_mTurq),
mCherry_st = mean(median_mCherry),
GFP_st = mean(median_GFP)
)
#get relative expression values for all samples
good_clones_tib_norm = good_clones_tib_norm_tmp %>%
left_join(standard_tib) %>%
# autofluorescence corrected and relative to 23_34A (_cor_rel)
mutate(mTurq_cor_rel = median_mTurq_cor / mTurq_cor_st,
mCherry_cor_rel = median_mCherry_cor / mCherry_cor_st,
GFP_cor_rel = median_GFP_cor / GFP_cor_st,
# autofluoresnce corrected, normalized to GFP and relative to 23_34A
mTurq_norm_rel = mTurq_norm / mTurq_norm_st,
mCherry_norm_rel = mCherry_norm / mCherry_norm_st,
# relative to 23_34A only (no autofluorescence correction etc)
mTurq_rel = median_mTurq / mTurq_st,
mCherry_rel = median_mCherry / mCherry_st,
GFP_rel = median_GFP / GFP_st )find the highest peaks and normalize them
#samples to calculate
samples_oi_tib = pData(flowset_fluo_good) %>%
filter(type %in% c("clone", "control")) %>%
filter(flowcytometer == "analytical")
samples_oi = samples_oi_tib$exp_name
#find the two highest peaks in the density plot (transformed back to linear space)
# just take the two highest peaks for each sample, if it shows two peaks (unbiased approach)
peaks_linear_list = lapply(samples_oi, function(smpl){
mTurq_expr = exprs(flowset_fluo_good[[smpl]])[,'mTurq']
peaks_obj = getPeaks(fwd_transf(mTurq_expr))
sample_type = pData(flowset_fluo_good) %>% filter(sample_name == smpl) %>% pull(type)
N_peaks_to_pick = min(2, length(peaks_obj$Peaks)) #get two peaks if there are 2 or more, get only one if one exists
top_2_peaks = order(peaks_obj$P.h, decreasing = T)[1:N_peaks_to_pick]
linear_peak = inv_transf(getPeaks(fwd_transf(mTurq_expr))$Peaks[top_2_peaks])
linear_peak
})
names(peaks_linear_list) = samples_oi
#arrange in a tibble
peaks_linear_tib = tibble(exp_name = rep(names(peaks_linear_list), lengths(peaks_linear_list)),
mTurq_peak = unlist(peaks_linear_list)) %>%
left_join(samples_oi_tib)
#correct, normalize and scale the peak data
peaks_linear_tib_scaled = peaks_linear_tib %>%
left_join(good_clones_tib_norm) %>%
mutate(mTurq_peak_cor = mTurq_peak - autofluo_mTurq,
mTurq_peak_norm = mTurq_peak_cor/median_GFP_cor,
mTurq_peak_rel = mTurq_peak_norm/mTurq_norm_st,
mTurq_peak_rel_uncor = mTurq_peak / mTurq_st) %>%
#mark the highest peak per sample
group_by(exp_name) %>%
mutate(highest_peak = mTurq_peak_rel >= max(mTurq_peak_rel) & is.finite(mTurq_peak_rel),
highest_peak_uncor = mTurq_peak_rel_uncor >= max(mTurq_peak_rel_uncor) )Plot
#highest peak only
tib_highest_peak_clones = filter(peaks_linear_tib_scaled, is.na(flowcytometer) | flowcytometer == "analytical" ) %>%
filter(GFP_state == "GFP+") %>%
filter(flow_experiment %in% c("E2250", "E2254", "E2350", "E2427")) %>%
filter((type %in% c("clone", "control") & flow_experiment != "E2427") ) %>%
filter(is.na(cell_line) | cell_line != "WT") %>% #cannot be normalized to GFP
mutate(landing_pad = factor(landing_pad, levels = c("none", "C9", "23", "8"),
labels = c("no insert", "-161 kb", "-116 kb", "-39 kb")),
treatment = factor(treatment, levels = c("untreated", "delA", "delB"),
labels = c("untreated", "ΔSox2", "ΔCBS_Sox2"))) %>%
filter(highest_peak)
#adding statistics does not work well for a faceted plot with missing combinations
#instead I just calculate the statistics separately and then add to the plot
library("rstatix")
test_116 = tib_highest_peak_clones %>%
ungroup() %>%
filter(landing_pad == "-116 kb") %>%
t_test(formula = mTurq_peak_rel ~ treatment,
var.equal = F) %>%
mutate(landing_pad = "-116 kb")
test_39 = tib_highest_peak_clones %>%
ungroup() %>%
filter(landing_pad == "-39 kb") %>%
mutate(treatment = factor(treatment, levels = c("untreated", "ΔCBS_Sox2"))) %>%
t_test(formula = mTurq_peak_rel ~ treatment,
var.equal = F) %>%
mutate(landing_pad = "-39 kb")
max_vals_plot = tib_highest_peak_clones %>%
ungroup() %>%
group_by(landing_pad, treatment) %>%
summarize(max_val = max(mTurq_peak_rel))
stats_tib = bind_rows(test_116, test_39) %>%
left_join(max_vals_plot, by = join_by(landing_pad == landing_pad, group2 == treatment)) %>%
mutate(adj_y = c(2, 5, 2, 4),
y.position = max_val + adj_y)
# make the plot
ggplot(tib_highest_peak_clones, aes(x = treatment, y = mTurq_peak_rel, col = treatment_for_color)) +
facet_nested(. ~ factor(landing_pad, levels = c("no insert", "-161 kb", "-116 kb", "-39 kb")) ,
scales = 'free_x', space = 'free_x') +
geom_hline(yintercept = 1, linetype = 'dashed') +
geom_hline(yintercept = 0) +
#add data
geom_boxplot(position = "dodge", outlier.shape = NA)+
geom_point(position = position_jitterdodge(jitter.width = 0.6, jitter.height = 0), size = 2.5, alpha = 0.5)+
#add statistics
stat_pvalue_manual(stats_tib, label = "p") +
#layout
scale_x_discrete(guide = guide_axis(angle = 90))+
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
ylab("relative reporter expression\n(high peak)") +
theme_bw(base_size = 14) +
theme(legend.position = 'none') +
colscale_clonesmedian_rep_per_treatment = tib_highest_peak_clones %>%
group_by(cell_line, treatment) %>%
summarize(median_rel_reporter = median(mTurq_peak_rel))
#change vs untreated, LP23 and LP8
median_rep_per_treatment %>%
filter(cell_line == "23_34A") %>%
mutate(change_reporter_vs_untr = median_rel_reporter / filter(median_rep_per_treatment, cell_line == "23_34A" & treatment == "untreated")$median_rel_reporter)## # A tibble: 3 × 4
## # Groups: cell_line [1]
## cell_line treatment median_rel_reporter change_reporter_vs_untr
## <chr> <fct> <dbl> <dbl>
## 1 23_34A untreated 0.867 1
## 2 23_34A ΔSox2 23.5 27.1
## 3 23_34A ΔCBS_Sox2 30.2 34.8
median_rep_per_treatment %>%
filter(cell_line == "8_34.8") %>%
mutate(change_reporter_vs_untr = median_rel_reporter / filter(median_rep_per_treatment, cell_line == "8_34.8" & treatment == "untreated")$median_rel_reporter)## # A tibble: 2 × 4
## # Groups: cell_line [1]
## cell_line treatment median_rel_reporter change_reporter_vs_untr
## <chr> <fct> <dbl> <dbl>
## 1 8_34.8 untreated 2.54 1
## 2 8_34.8 ΔCBS_Sox2 8.74 3.44
#deletion LP23 vs LP8
median_rep_per_treatment %>%
filter(cell_line == "23_34A") %>%
mutate(del_23_vs_8 = median_rel_reporter / filter(median_rep_per_treatment, cell_line == "8_34.8" & treatment == "ΔCBS_Sox2")$median_rel_reporter)## # A tibble: 3 × 4
## # Groups: cell_line [1]
## cell_line treatment median_rel_reporter del_23_vs_8
## <chr> <fct> <dbl> <dbl>
## 1 23_34A untreated 0.867 0.0992
## 2 23_34A ΔSox2 23.5 2.69
## 3 23_34A ΔCBS_Sox2 30.2 3.45
#Hopping experiment
#load the -116kb data (LP23)
tib_23_all_data = read_tsv(path_tib_23_ints) %>%
filter(chr %in% paste0("chr", c(1:19, "X"))) %>%
mutate(chr = factor(chr, levels = paste0("chr", c(1:19, "X"))))
#the control pool in E2285 (rep3) was sequenced as 10 separate libraries, combine them into one sample
tib_23_E2285_ctrls = tib_23_all_data %>%
filter(replicate == "rep3" & insert == "Sox2P" & sample_type == "control pool") %>%
mutate(sample_name = "Sox2P-reporter -116 kb, unsorted control pool rep3")%>%
group_by(chr, start, end, seq, strand, replicate, experiment, launch_pad_location, insert, genotype, sample_type, sorted_gate, pool_nr, sample_name) %>%
summarize(read_count = sum(read_count),
read_count_1 = sum(read_count_1),
read_count_2 = sum(read_count_2), .groups = "keep")
tib_23 = tib_23_all_data %>%
#select the rest of the data
filter(!(replicate == "rep3" & insert == "Sox2P" & sample_type == "control pool")) %>%
select(chr, start, end, seq, strand, replicate, experiment, launch_pad_location, insert, genotype, sample_type, sorted_gate, pool_nr, sample_name,
read_count, read_count_1, read_count_2)%>%
#add merged controls back in
bind_rows(tib_23_E2285_ctrls) %>%
#mark hopped integrations
mutate(hopped = start != 34643961)
#load the -161kb mCh+ data (LP C9)
tib_C9_mChpos = read_tsv(path_tib_C9_mChpos_ints) %>%
filter(chr %in% paste0("chr", c(1:19, "X"))) %>%
mutate(chr = factor(chr, levels = paste0("chr", c(1:19, "X")))) %>%
#combine the 2 libraries of "Sox2P-reporter -161 kb, population P1 pool1 rep2"
mutate(to_merge = case_when(replicate == "rep2" & insert == "Sox2P" & sorted_gate == "P1" ~ "merge",
.default = sample_name)) %>%
mutate(sample_name = case_when(to_merge == "merge" ~ "Sox2P-reporter -161 kb, population P1 pool1 rep2",
.default = sample_name)) %>%
group_by(chr, start, end, seq, strand, replicate, experiment, launch_pad_location, insert, genotype, sample_type, sorted_gate, pool_nr, sample_name) %>%
summarize(read_count = sum(read_count),
read_count_1 = sum(read_count_1),
read_count_2 = sum(read_count_2), .groups = "keep") %>%
#mark hopped integrations
mutate(hopped = start != 34598479)
#load the -161kb mCh+ data (LP C9)
tib_C9_mChneg = read_tsv(path_tib_C9_mChneg_ints) %>%
filter(chr %in% paste0("chr", c(1:19, "X"))) %>%
mutate(chr = factor(chr, levels = paste0("chr", c(1:19, "X")))) %>%
group_by(chr, start, end, seq, strand, replicate, experiment, launch_pad_location, insert, genotype, sample_type, sorted_gate, pool_nr, sample_name) %>%
summarize(read_count = sum(read_count),
read_count_1 = sum(read_count_1),
read_count_2 = sum(read_count_2), .groups = "keep") %>%
#mark hopped integrations
mutate(hopped = start != 34598479)combine and filter
#filter the integrations
tib_filtered <- bind_rows(tib_23,tib_C9_mChpos, tib_C9_mChneg) %>%
#add variables used in the plotting (old names of cell lines etc)
mutate(population = case_when(sample_type == "control pool" ~ "ctrl",
.default = sorted_gate),
cell_line = case_when(launch_pad_location == "-116 kb" & insert == "Sox2P" ~ "23_34A",
launch_pad_location == "-161 kb" & insert == "Sox2P" & genotype == "WT" ~ "C9_mCh_34",
launch_pad_location == "-161 kb" & insert == "Sox2P" & genotype == "Sox2::mCherry deleted" ~ "C9_mCh-_34"
),
landing_pad = case_when(launch_pad_location == "-116 kb" ~ "23",
launch_pad_location == "-161 kb" ~ "C9")) %>%
#annotate confidence of mapping (one or both ITRs)
mutate(mapped_arms = case_when(read_count_1 == 0 ~ "rv_only",
read_count_2 == 0 ~ "fw_only",
read_count_1 > 0 & read_count_2 > 0 ~ "both_arms")) %>%
#filter the data
filter(population == "ctrl" | read_count >= 2) %>% #for ctrl distribution 1 mapping read is enough
filter(!(start >= 34721183 & start <= 34721192)) %>% #remove contamination from LP8
filter(! start %in% c(25018987, 35232946) ) %>% #remove contamination clone C1
filter(strand %in% c("+", "-"))
tib_filtered_P1_strict = tib_filtered %>%
filter(!(population == "P1" & mapped_arms != "both_arms")) #only keep P1 integrations with 2 mapped arms
all_exp = unique(tib_filtered_P1_strict$experiment)P_RANGE = c(34.5E6, 34.9E6)
tib_for_plot = filter(tib_filtered_P1_strict,
hopped & chr == "chr3" &
cell_line %in% c("C9_mCh-_34", "C9_mCh_34", "23_34A") &
start >= P_RANGE[1] & start <= P_RANGE[2]) %>%
mutate(cell_line = case_when(
cell_line %in% c("23_34A", "C9_mCh_34") ~ "mChpos_34",
.default = cell_line)) %>%
mutate(CL_population = paste0(cell_line, "_", population)) %>%
filter(cell_line == "C9_mCh-_34" | population %in% c("P1", "P6"))
LP_tib_cl = tibble(cell_line = c("C9_mCh-_34", "mChpos_34", "mChpos_34"),
start = c(start(landingPad_C9),
start(landingPad_23),
start(landingPad_C9)))
PLOT_ANNOT = T
plot_beesw_comb = ggplot(filter(tib_for_plot),
aes(x = start, y = population, col = CL_population)) +
facet_grid(cell_line ~ ., scales = 'free_y', space = 'free') + #only facet when necessary
list( #add elements in a list, you cannot use + inside an if statement
# annotate enhancer
geom_vline(xintercept = c(start(enhancer), end(enhancer)), col = brown),
annotate("rect", xmin = start(enhancer), xmax = end(enhancer), ymin = -Inf, ymax = Inf, alpha = .2, fill = brown),
# annotate gene
annotate("rect", xmin = start(Sox2_gene), xmax = end(Sox2_gene), ymin = -Inf, ymax = Inf, alpha = .2, fill = 'red'),
geom_vline(xintercept = c(start(Sox2_gene), end(Sox2_gene)), col = 'red')) +
geom_vline(data = LP_tib_cl, aes(xintercept = start), col = 'darkgrey') +
# # annotate CTCFs
geom_vline(xintercept = start(CTCF_mm10.chr3_extra), col = 'darkgrey', lty="11") +
#annotate landing pad
labs(x='Genomic coordinate (Mb)',y='sorted gate') +
#datapoints
geom_quasirandom(alpha = 0.5, size = 1.5, varwidth = T, bandwidth = 0.8,
# shape = 16, #changes to point without border, so alpha applies to the whole point
method = "quasirandom",
stroke = NA) +
geom_text(data = plyr::count(tib_for_plot, vars = c("cell_line", "population")), aes(label = paste("n = ", freq, sep = ""),
x = Inf),
hjust = "inward", vjust = "top",
col = 'black') +
#layout:
theme_classic(base_size = 16) +
theme(legend.position = "none") +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6), labels = scales::unit_format(scale = 1E-6, accuracy = 0.01, unit=NULL),
limits=P_RANGE, expand=c(0,0)) +
scale_y_discrete(limits = rev) +
colScale_pop_CL
plot_beesw_comb
we estimated the borders based on the P6 integrations, plot these
estimated borders on the beeswarm
plot_beesw_comb +
geom_vline(xintercept = c(34594566, 34805249), col = "#9c2261") + #estimated borders after Sox2 deletion
geom_vline(xintercept = c(34780523, start(landingPad_23)), col = "#212B71") #estimated original borderscalculate old and new domain size (and distances of the borders from the gene/SCR)
## [1] 55.429
## [1] 38848
old_domainsize = 34780523 - start(landingPad_23) #first P6 after SCR as border, use LP as upstr border (there are P6 closer but there are just a lost of integrations there)
new_domainsize = 34805249 - 34594566
new_domainsize / old_domainsize## [1] 1.542753
Plotting triangles in a pdf is a nightmare, so instead 1 just make one plot with the CTCF lines colored by orientation and based on that add the CTCF triangles manually to the paper figures.
ggplot() +
# annotate enhancer
# annotate("rect", xmin = start(enhancer), xmax = end(enhancer), ymin = -Inf, ymax = Inf, alpha = .2, fill = brown)+
geom_vline(xintercept = c(start(enhancer), end(enhancer)), col = brown)+
# annotate gene
# annotate("rect", xmin = start(Sox2_gene), xmax = end(Sox2_gene), ymin = -Inf, ymax = Inf, alpha = .2, fill = 'red')+
geom_vline(xintercept = c(start(Sox2_gene), end(Sox2_gene)), col = 'red') +
geom_vline(data = as_tibble(CTCF_mm10.chr3_extra), aes(xintercept = start, col = strand), size = 2, lty = 'dashed') +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6), labels = scales::unit_format(scale = 1E-6, accuracy = 0.01, unit=NULL),
limits=c(P_RANGE), expand=c(0,0)) +
theme_bw()library(openCyto)
library(CytoML)
library(flowWorkspace)
diva_ws <- open_diva_xml(diva_xml_file)
gs <- diva_to_gatingset(diva_ws,
name = 1, #the group to import
path = path_FACS_sorting_E2555,
#swapped columns are a known diva 'bug', for some files you need to 'unswap' them, default is T (as necessary for this file)
# swap_cols = F,
worksheet = "global",
verbose = F,
execute = T)
pdata_tib = pData(gs) %>%
as_tibble() %>%
mutate(name_short = str_remove(str_remove(name, "ME2024050._E2534_rep._"), "_([0-9]){3}.fcs"),
replicate = str_extract(name_short, "rep.$"),
cell_line = case_when(grepl("WT", name_short) ~ "WT",
grepl("F121_9", name_short) ~ "F121_9",
grepl("23_34A", name_short) ~ "23_34A",
.default = str_extract(name_short, "C9_.._mCh.")),
treatment = case_when(grepl("PB", name_short) ~ "PB",
grepl("SB", name_short) ~ "SB",
.default = "untreated"),
recording = case_when(grepl("_SORT", name_short) ~ "sorting",
.default = "pre-recording"),
sample_type = case_when(cell_line %in% c("WT", "F121_9", "23_34A") | treatment == "untreated" ~ "control",
.default = "sample")
) %>%
mutate(construct = case_when(cell_line %in% c("WT", "F121_9") ~ "none",
cell_line == c("23_34A") ~ "34",
.default = str_extract(cell_line, "([0-9]){2}")),
mCh_status = case_when(cell_line == "WT" ~ "mCh-",
cell_line %in% c("23_34A", "F121_9") ~ "mCh+",
.default = str_extract(cell_line, "mCh.")))
pdata_df = as.data.frame(pdata_tib)
rownames(pdata_df) = pdata_df$name
pData(gs) = pdata_dfgs_oi_mChneg = subset(gs, sample_type == "sample" & mCh_status == "mCh-" & construct == "34" &
(recording == "sorting" | treatment == "PB")) #sorting only, combining 2 data sets doesn't give the right percentages on the gates
ggcyto::ggcyto(gs_oi_mChneg, aes(x = "GFP", y = "mTurqoise2"), subset = "mCh_neg") +
#facet by cell line / treatment
facet_grid(cell_line ~ treatment) +
#plot cells
geom_hex(bins = 128, aes( fill = after_stat(ncount))) +
scale_fill_distiller(palette = 'Spectral') + #manually adding a fill scale also forces the scale to be linear again.
#plot_gates
ggcyto::geom_gate(paste0("P", 1:6, "_mCh-"), col = 'black') +
ggcyto::geom_stats(adjust = c(-0.3, 0.5), digits = 2, size = 3) +
#layout
theme_bw(base_size = 14) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none",
aspect.ratio = 1) +
ggtitle(element_blank())+
ggcyto::axis_x_inverse_trans() +
ggcyto::axis_y_inverse_trans() +
ggcyto::ggcyto_par_set(limits = list(x = c(1.2, 4), y = c(0, 4.2))) +
scale_y_continuous(expand = c(0,0)) +
scale_x_continuous(expand = c(0,0))+
ggcyto::labs_cyto(labels = "marker")